clear
set matsize 5000
cd <directory>
set matsize 5000

**********************************************************************************************************************************
**This file cleans the weather data and prepares it to be merged (into the productivity data, attendance data, and salary data)
**********************************************************************************************************************************

local files : dir "`c(pwd)'" files "*.csv"

foreach file in `files' {
	insheet using `file', clear
	drop v11
	save `file'.dta, replace
}

local files : dir "`c(pwd)'" files "*.dta"

clear
foreach file in `files' {
append using `file'
}
g date1=date(date, "MDY")
g year=year(date1)
drop if year<2010
g month=month(date1)
g day=day(date1)
drop if year==2014
egen meant=rmean( maxtemperature mintemperature)

foreach var of varlist meant{

g rho`var'= (relativehumidity/100) * 6.105*exp(17.27*`var'/ (237.7 + `var'))
g wbgt`var'=0.567*`var' + 0.216*rho`var'+ 3.38

g rho`var'2= (68.92163/100) * 6.105*exp(17.27*`var'/ (237.7 +`var'))
g wbgt`var'2=0.567*`var' + 0.216*rho`var'2+ 3.38

}
g meantf=meant*(9/5)+32
g himeantf=-42.379 + 2.04901523*meantf + 10.14333127*relativehumidity - .22475541*meantf*relativehumidity - .00683783*meantf*meantf - .05481717*relativehumidity*relativehumidity + .00122874*meantf*meantf*relativehumidity + .00085282*meantf*relativehumidity*relativehumidity - .00000199*meantf*meantf*relativehumidity*relativehumidity
replace himeantf=himeantf+[(relativehumidity-85)/10] * [(87-meantf)/5] if relativehumidity>85 & meantf>=80 & meantf<=87

g himeant=(himeantf-32)*5/9

save "weather_2010to2013", replace

use "weather_2010to2013", clear
foreach var of varlist mintemp{

g rho`var'= (relativehumidity/100) * 6.105*exp(17.27*`var'/ (237.7 + `var'))
g wbgt`var'=0.567*`var' + 0.216*rho`var'+ 3.38


}
g mintempf=mintemp*(9/5)+32
g himintempf=-42.379 + 2.04901523*mintempf + 10.14333127*relativehumidity - .22475541*mintempf*relativehumidity - .00683783*mintempf*mintempf - .05481717*relativehumidity*relativehumidity + .00122874*mintempf*mintempf*relativehumidity + .00085282*mintempf*relativehumidity*relativehumidity - .00000199*mintempf*mintempf*relativehumidity*relativehumidity
replace himintempf=himintempf+[(relativehumidity-85)/10] * [(87-mintempf)/5] if relativehumidity>85 & mintempf>=80 & mintempf<=87

g himintemp=(himintempf-32)*5/9


foreach var of varlist maxtemp{

g rho`var'= (relativehumidity/100) * 6.105*exp(17.27*`var'/ (237.7 + `var'))
g wbgt`var'=0.567*`var' + 0.216*rho`var'+ 3.38

}
g maxtempf=maxtemp*(9/5)+32
g himaxtempf=-42.379 + 2.04901523*maxtempf + 10.14333127*relativehumidity - .22475541*maxtempf*relativehumidity - .00683783*maxtempf*maxtempf - .05481717*relativehumidity*relativehumidity + .00122874*maxtempf*maxtempf*relativehumidity + .00085282*maxtempf*relativehumidity*relativehumidity - .00000199*maxtempf*maxtempf*relativehumidity*relativehumidity
replace himaxtempf=himaxtempf+[(relativehumidity-85)/10] * [(87-maxtempf)/5] if relativehumidity>85 & maxtempf>=80 & maxtempf<=87

g himaxtemp=(himaxtempf-32)*5/9


foreach var of varlist mintemperature maxtemperature himintemp himaxtemp wbgtmintemp wbgtmaxtemp{
foreach i of numlist 1/7{
sort year month day
gen `var'lag`i'=`var'[_n-`i']
sort year month day
gen `var'lead`i'=`var'[_n+`i']

}
sort year month day
gen `var'week=(`var'[_n-1]+`var'[_n-2]+`var'[_n-3]+`var'[_n-4]+`var'[_n-5]+`var'[_n-6]+`var'[_n-7])/7
sort year month day
gen `var'leadweek=(`var'[_n+1]+`var'[_n+2]+`var'[_n+3]+`var'[_n+4]+`var'[_n+5]+`var'[_n+6]+`var'[_n+7])/7

}

save "weather_2010to2013_allweeklags", replace

*data file received with a list of factories and their latitude and longitude.
use "units_geocode_mtempid.dta", clear
local new = _N + 1
        set obs `new'

replace unit=22 if _n==_N
replace lat=11.039446 if unit==22
replace lon=77.313592 if unit==22
drop mtempid
g mtempid=_n
save "tamu_unitmergeincluding22.dta", replace


use "weather_2010to2013_allweeklags", clear
keep lat lon
duplicates drop lat lon, force
expand 41
bys lat lon: g mtempid=_n
merge m:1 mtempid using "tamu_unitmergeincluding22.dta"


sphdist, lat1(lat) lat2(latitude) lon1(lon) lon2(longitude) gen(dist)
bys unit: egen mindist=min(dist)
keep if dist==mindist
keep unit latitude longitude 
save "tamu_unitmerge", replace
 
 *Unit 11 assigned same coordinates as Unit 12.

use "units_geocode_mtempid.dta", clear
local new = _N + 2
set obs `new'
replace unit=22 if _n==_N-1
replace lat=11.039446 if unit==22
replace lon=77.313592 if unit==22
replace unit=11 if _n==_N
replace lat=12.88728 if unit==11
replace lon=77.60259 if unit==11
drop mtempid
g mtempid=_n
save "unitsincluding11and22_geocode_mtempid", replace


use "weather_2010to2014_allweeklags", clear
duplicates drop lat lon, force
expand 42
bys lat lon: g mtempid=_n
merge m:1 mtempid using "unitsincluding11and22_geocode_mtempid.dta"

sphdist, lat1(lat) lat2(latitude) lon1(lon) lon2(longitude) gen(dist)
bys lat lon: egen mindist=min(dist)
keep if dist==mindist
keep lat* lon* dist elevation unit
save "tamu_allunitsmerge.dta", replace
